library(tidyverse)
library(rvest)
library(httr)
library(plotly)
crime_df = readRDS(file = "./datasets/nyc_felony_crimes.rds") 

#### selecting crimes of interest
sex_drug_weapons = crime_df %>% 
  filter(pd_cd %in% c(178, 694, 697, 176, 180, 153, 157, 177, 168, 159, 166, 164, 179, 155, 586, 696, # Sex related felony crimes
                      ## Drug related felony crimes
                      500, 501, 502, 503, 505, 507, 510, 512, 514, 515, 519, 520, 521, 523, 524, 529, 530, 531, 532, 568, 570,
                      ### Weapon related felony crimes
                      781, 792, 793, 796)) %>% 
  mutate(longitude = as.numeric(longitude),
         latitude = as.numeric(latitude)) %>% 
  select(boro_nm, cmplnt_fr_dt, cmplnt_fr_tm, latitude, longitude, ky_cd, ofns_desc, pd_cd, pd_desc, vic_race, vic_sex, year, prem_typ_desc) %>% 
  mutate(boro_nm = if_else(boro_nm == "staten island", "staten_island", boro_nm),
         crime_group = if_else(pd_cd %in% c(178, 694, 697, 176, 180, 153, 157, 177, 168, 159, 166, 164, 179, 155, 586, 696), "Sex-Related", 
                               if_else(pd_cd %in% c(500, 501, 502, 503, 505, 507, 510, 512, 514, 515, 519, 520, 521, 523, 524, 529, 530, 531, 532, 568, 570), "Drug-Related", 
                                       if_else(pd_cd %in% c(781, 792, 793, 796), "Weapon-Related", pd_cd))))

The last official census in the USA was in 2010. We will use the 2017 United States Census Bureau estimate population for the years of 2017, 2016, 2015 and 2014 as we do not expect a significant population change.

url = "https://www.census.gov/quickfacts/fact/table/newyorkcitynewyork,bronxcountybronxboroughnewyork,kingscountybrooklynboroughnewyork,newyorkcountymanhattanboroughnewyork,queenscountyqueensboroughnewyork,richmondcountystatenislandboroughnewyork/PST045217"

nyc_population = read_html(url) %>%  html_nodes(css = "table") %>% .[[1]] %>% 
  html_table(header = TRUE) %>% as.tibble() %>% 
  janitor::clean_names()

names(nyc_population)[1:7] = c("estimate_date", "new_york_city", "bronx", "brooklyn", "manhattan", "queens", "staten_island")

nyc_population = nyc_population %>% 
  gather(key = boro_nm, value = population, estimate_date:staten_island) %>% 
  mutate(population = if_else(population == "Population estimates, July 1, 2017,  (V2017)", "2017", population),
         population = as.numeric(gsub("," , "", population)))

Combined weapons, sexual and drug related felony crime rates trends by borough

grouped_df = sex_drug_weapons %>% group_by(boro_nm, year) %>% 
  summarise(number = n())

full = left_join(grouped_df, nyc_population, by = "boro_nm") %>% 
  mutate(crime_rate = (number/population)*100)

x = full %>% filter(!is.na(boro_nm)) %>% 
  ggplot(aes(x = year, y = crime_rate, color = boro_nm)) + 
  geom_point() + geom_line(size = 1) +
  labs(x = "Year",
       y = "Crime rate",
       legend) + viridis::scale_color_viridis(
      name = "Borough", 
      discrete = TRUE
    ) + theme_classic()

ggplotly(x)

Stratify to see specific trends of the crimes

stratify_df = sex_drug_weapons %>% group_by(boro_nm, crime_group, year) %>% 
  summarise(number = n())

population_df = left_join(stratify_df, nyc_population, by = "boro_nm") %>% 
  mutate(crime_rate = (number/population)*100)
population_df %>% filter(!is.na(boro_nm)) %>% 
  ggplot(aes(x = year, y = crime_rate, color = boro_nm)) + 
  geom_point() + geom_line(size = 1) + facet_grid(~crime_group) +
  labs(x = "Year",
       y = "Crime rate",
       legend) + viridis::scale_color_viridis(
      name = "Borough", 
      discrete = TRUE
    )

Bronx has consistently had the highest crime rate over the last 4 years for all the 3 types of crimes. We see a steep reduction, however in the crime rate for bronx between the years of 2016 and 2017. Crime rates for all crimes in all boroughs decreased from 2016 to 2016. Surprisingly, brooklyn has seen a consitent rise in the crime rate of drug related felonies from 2015 to 2017.